Don't do any backlink batches if there are no backlinks.
[lhc/web/wiklou.git] / includes / BacklinkCache.php
1 <?php
2
3 /**
4 * Class for fetching backlink lists, approximate backlink counts and partitions.
5 * Instances of this class should typically be fetched with $title->getBacklinkCache().
6 *
7 * Ideally you should only get your backlinks from here when you think there is some
8 * advantage in caching them. Otherwise it's just a waste of memory.
9 */
10 class BacklinkCache {
11 var $partitionCache = array();
12 var $fullResultCache = array();
13 var $title;
14 var $db;
15
16 const CACHE_EXPIRY = 3600;
17
18 /**
19 * Create a new BacklinkCache
20 */
21 function __construct( $title ) {
22 $this->title = $title;
23 }
24
25 /**
26 * Clear locally stored data
27 */
28 function clear() {
29 $this->partitionCache = array();
30 $this->fullResultCache = array();
31 unset( $this->db );
32 }
33
34 /**
35 * Set the Database object to use
36 */
37 public function setDB( $db ) {
38 $this->db = $db;
39 }
40
41 protected function getDB() {
42 if ( !isset( $this->db ) ) {
43 $this->db = wfGetDB( DB_SLAVE );
44 }
45 return $this->db;
46 }
47
48 /**
49 * Get the backlinks for a given table. Cached in process memory only.
50 * @param string $table
51 * @return TitleArray
52 */
53 public function getLinks( $table, $startId = false, $endId = false ) {
54 wfProfileIn( __METHOD__ );
55
56 if ( $startId || $endId ) {
57 // Partial range, not cached
58 wfDebug( __METHOD__.": from DB (uncacheable range)\n" );
59 $conds = $this->getConditions( $table );
60 // Use the from field in the condition rather than the joined page_id,
61 // because databases are stupid and don't necessarily propagate indexes.
62 $fromField = $this->getPrefix( $table ) . '_from';
63 if ( $startId ) {
64 $conds[] = "$fromField >= " . intval( $startId );
65 }
66 if ( $endId ) {
67 $conds[] = "$fromField <= " . intval( $endId );
68 }
69 $res = $this->getDB()->select(
70 array( 'page', $table ),
71 array( 'page_namespace', 'page_title', 'page_id' ),
72 $conds,
73 __METHOD__ );
74 $ta = TitleArray::newFromResult( $res );
75 wfProfileOut( __METHOD__ );
76 return $ta;
77 }
78
79 if ( !isset( $this->fullResultCache[$table] ) ) {
80 wfDebug( __METHOD__.": from DB\n" );
81 $res = $this->getDB()->select(
82 array( 'page', $table ),
83 array( 'page_namespace', 'page_title', 'page_id' ),
84 $this->getConditions( $table ),
85 __METHOD__ );
86 $this->fullResultCache[$table] = $res;
87 }
88 $ta = TitleArray::newFromResult( $this->fullResultCache[$table] );
89 wfProfileOut( __METHOD__ );
90 return $ta;
91 }
92
93 /**
94 * Get the field name prefix for a given table
95 */
96 protected function getPrefix( $table ) {
97 static $prefixes = array(
98 'pagelinks' => 'pl',
99 'imagelinks' => 'il',
100 'categorylinks' => 'cl',
101 'templatelinks' => 'tl',
102 'redirect' => 'rd',
103 );
104 if ( isset( $prefixes[$table] ) ) {
105 return $prefixes[$table];
106 } else {
107 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
108 }
109 }
110
111 /**
112 * Get the SQL condition array for selecting backlinks, with a join on the page table
113 */
114 protected function getConditions( $table ) {
115 $prefix = $this->getPrefix( $table );
116 switch ( $table ) {
117 case 'pagelinks':
118 case 'templatelinks':
119 case 'redirect':
120 $conds = array(
121 "{$prefix}_namespace" => $this->title->getNamespace(),
122 "{$prefix}_title" => $this->title->getDBkey(),
123 "page_id={$prefix}_from"
124 );
125 break;
126 case 'imagelinks':
127 $conds = array(
128 'il_to' => $this->title->getDBkey(),
129 'page_id=il_from'
130 );
131 break;
132 case 'categorylinks':
133 $conds = array(
134 'cl_to' => $this->title->getDBkey(),
135 'page_id=cl_from',
136 );
137 break;
138 default:
139 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
140 }
141 return $conds;
142 }
143
144 /**
145 * Get the approximate number of backlinks
146 */
147 public function getNumLinks( $table ) {
148 if ( isset( $this->fullResultCache[$table] ) ) {
149 return $this->fullResultCache[$table]->numRows();
150 }
151 if ( isset( $this->partitionCache[$table] ) ) {
152 $entry = reset( $this->partitionCache[$table] );
153 return $entry['numRows'];
154 }
155 $titleArray = $this->getLinks( $table );
156 return $titleArray->count();
157 }
158
159 /**
160 * Partition the backlinks into batches.
161 * Returns an array giving the start and end of each range. The first batch has
162 * a start of false, and the last batch has an end of false.
163 *
164 * @param string $table The links table name
165 * @param integer $batchSize
166 * @return array
167 */
168 public function partition( $table, $batchSize ) {
169 // Try cache
170 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
171 wfDebug( __METHOD__.": got from partition cache\n" );
172 return $this->partitionCache[$table][$batchSize]['batches'];
173 }
174 $this->partitionCache[$table][$batchSize] = false;
175 $cacheEntry =& $this->partitionCache[$table][$batchSize];
176
177 // Try full result cache
178 if ( isset( $this->fullResultCache[$table] ) ) {
179 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
180 wfDebug( __METHOD__.": got from full result cache\n" );
181 return $cacheEntry['batches'];
182 }
183 // Try memcached
184 global $wgMemc;
185 $memcKey = wfMemcKey( 'backlinks', md5( $this->title->getPrefixedDBkey() ),
186 $table, $batchSize );
187 $memcValue = $wgMemc->get( $memcKey );
188 if ( is_array( $memcValue ) ) {
189 $cacheEntry = $memcValue;
190 wfDebug( __METHOD__.": got from memcached $memcKey\n" );
191 return $cacheEntry['batches'];
192 }
193 // Fetch from database
194 $this->getLinks( $table );
195 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
196 // Save to memcached
197 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
198 wfDebug( __METHOD__.": got from database\n" );
199 return $cacheEntry['batches'];
200 }
201
202 /**
203 * Partition a DB result with backlinks in it into batches
204 */
205 protected function partitionResult( $res, $batchSize ) {
206 $batches = array();
207 $numRows = $res->numRows();
208 $numBatches = ceil( $numRows / $batchSize );
209 for ( $i = 0; $i < $numBatches; $i++ ) {
210 if ( $i == 0 ) {
211 $start = false;
212 } else {
213 $rowNum = intval( $numRows * $i / $numBatches );
214 $res->seek( $rowNum );
215 $row = $res->fetchObject();
216 $start = $row->page_id;
217 }
218 if ( $i == $numBatches - 1 ) {
219 $end = false;
220 } else {
221 $rowNum = intval( $numRows * ( $i + 1 ) / $numBatches );
222 $res->seek( $rowNum );
223 $row = $res->fetchObject();
224 $end = $row->page_id - 1;
225 }
226 $batches[] = array( $start, $end );
227 }
228 return array( 'numRows' => $numRows, 'batches' => $batches );
229 }
230 }